## Code that creates the combined rolling regression results presented in Figure 5 in the main text. 
## We here analyze both candidacy and turnout. 
## The first part of the script creates the data used for the plots. 

## Last changed: 2025-05-23 (KOL)

sink("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Output/Figure5.txt", split=TRUE)

library(PSweight)
library(cobalt)
library(MatchIt)
library(distances)
library(moreparty)
library(party)
library(caret)
library(bonsai)
library(randomForest)
library(tidymodels)
library(tidyverse)
library(rmarkdown)
library(tinytex)
library(tibble)
library(knitr)
library(car)
library(fst)
library(stargazer)
library(pryr)
library(stringr)
library(fst)
library(lubridate)
library(data.table)
library(summarytools)
library(ggplot2)
library(cowplot)
library(ggpubr)
library(haven)
library(broom)
library(fixest)
library(modelsummary)
library(gt)
library(MASS)
library(survey)
library(collapse)
library(readxl)
library(Hmisc)
library(DescTools)
library(ranger)
library(stablelearner)
library(future.apply)
library(lme4)
library(partykit)
library(ggiplot)



##################  First assemble data and study candidacy##################################################
# Specify the name of the treatment variable
  tvar <- "T94_90"

# Specify the the birth cohorts to be used for the analysis, note that 
# the rolling regression framework forces us to widen our standard range with +/- 3 years.
  lbyr <- 1939
  ubyr <- 1970

# Specify if only include Swedish citizens 
  citizen <- 1

# Specify the propensity score specification
  psformula <- formula(T94 ~
                       Kon + DAStod_1982 + DAStod_1983 + DAStod_1984 +
                       DAStod_1985 + DAStod_1986 + DAStod_1987 + DAStod_1988 +
                       DAStod_1989 +
                       DAntBarn_91 + Sambo_91 + InvBak  |
                       SSYK3_90 + SNI2_91 + Kommun_91 +
                       rCARB_1982 + rCARB_1983 + rCARB_1984 + rCARB_1985 +
                       rCARB_1986 + rCARB_1987 + rCARB_1988 + rCARB_1989 +
                       pArbInk_1990 + pArbInk_1991 + UtbAr_91 +
                       FodAr + Nom82 + Nom85 + Nom88 + Nom91)

  Models <-  vector('list', 10)
  j <- 1
  urval <- 1

  dbRoll <- data.table()
  
## Loop over all birth years from 1942-1967, and estimate the effect on the likelihood of candidacy.  
for(ald in seq(1942, 1967)) { 
  for(ses in c(4:4)) {    
    
    # Read in the data sets
    NomVald <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_NomVald.fst", as.data.table=T)
    RTB <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_RTB.fst", as.data.table=T)
    db <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_Nom.fst", as.data.table=T)
    
    db[, T94 := get(tvar)]
    mdb <- db[between(FodAr, lbyr, ubyr) & (SvMedb82_18 >= citizen) & (AllaVal >= urval),]
    rm(db)
    gc()
    
    print(ses)
    
    mdb[!is.na(avSES3), ravSES := cut_number(avSES3, 4, labels = FALSE)]
    if(ses==1) mdb <- mdb[between(ravSES, 1, 1)]
    if(ses==2) mdb <- mdb[between(ravSES, 2, 2)]
    if(ses==3) mdb <- mdb[between(ravSES, 3, 3)]
    if(ses==4) mdb <- mdb[between(ravSES, 4, 4)]
    
    # Use a window of +/-3 years for the rolling regression model.
    albyr <- ald-3
    aubyr <- ald+3
    
    mdb <- mdb[between(FodAr, albyr, aubyr), ]
    print(qsu(mdb[, FodAr]))
    
    # Use logit model to estimate propensity scores
    mdb <- mdb[complete.cases(mdb),]
    tm <- proc.time()
    logmod <- feglm(psformula,  
                    data = mdb, family="binomial")
    proc.time()-tm
    summary(logmod)
    
    mdb[]
    
    # Extract predicted probability (propensity scores)
    if(logmod$nobs == logmod$nobs_origin){ 
      mdb[, pT94 := predict(logmod, type = "response")]
    }else{
      mdb[logmod$obs_selection$obsRemoved, pT94 := predict(logmod, type = "response")]
    }
    
    mdb <- mdb[complete.cases(mdb),]
    
    # Now use the Full matching approach of Sävje et al. to obtain ATT and ATE weights.  
    
    tmp <- proc.time()
    set.seed(7892)
    m.out <- matchit(T94~1, data = mdb,
                     distance = mdb$pT94, method = "quick", estimand = "ATT")
    proc.time()-tmp
    m.data1 <- as.data.table(match.data(m.out))[, .(LopNr, weights)]
    setnames(m.data1, "weights", "gm_att")
    
    mdb <- m.data1[, .(LopNr, gm_att)][mdb, on = "LopNr"]
    gc()
    RTB <- mdb[RTB, on = "LopNr"]
    
    rm(mdb)
    gc()
    
    rm(list=setdiff(ls(), c("NomVald", "RTB", "rtbvar", "Models", "j", "tvar", "lbyr", "ubyr", "citizen", "psformula", "urval", "deso91", "ald", "dbRoll", "ses",
                            "g3", "g4", "g0", "g1", "g5", "g6", "g0b", "g1b")))
    gc()
    
    
    mdb <- RTB[!is.na(gm_att), 
               .(Kon, Nom82, Nom85, Nom88, Nom91, Sysselsatt_85, DAntBarn_91, Sambo_91, 
                 DStud_91, DForLed_91, DSjukRe_91, DSocBidrFam_91, DBostBidrFam_91,
                 UtbAr_91, FodAr, InvBak, isei, Yrke80_90, SNI2_90, Kommun_91, fp_LoneInk_85, fp_LoneInk_90, fp_LoneInk_91,
                 gm_att, Nom, LopNr, T94, Ar, pT94)]
    
    gc()
    
    mdb[, Nom := Nom*100]
    mdb[, cID := .N, by=.(LopNr, Ar)]
     
    rm(RTB)
    gc()
    
    
    mdb[, Post := as.numeric(Ar>1991)]
    
    mb <- mean(mdb[Ar>1991 & T94==0, Nom], na.rm=T)
    print(mb)
    
    # Estimate the short-run effects on candidacy, using our matching weights.
    mod <- feols(Nom~T94 | Ar, cluster ="LopNr", 
                 weights = mdb[Ar %in% c(1994, 1998, 2002, 2006), gm_att], 
                 mdb[Ar %in% c(1994, 1998, 2002, 2006 )])
    
    # Estimate the long-run effects on candidacy, using our matching weights.
    mod1 <- feols(Nom~T94 | Ar, cluster ="LopNr", 
                  weights = mdb[Ar %in% c(2010, 2014, 2018, 2022), gm_att], 
                  mdb[Ar %in% c(2010, 2014, 2018, 2022)])
    
    mb <- mean(mdb[Ar>1991 & T94==0, Nom], na.rm=T)
    print(mb)
    
    cmod <- data.table(b_sh = coef(mod)["T94"], b_lo = coef(mod1)["T94"], se_sh = se(mod)["T94"], se_lo = se(mod1)["T94"], 
                       ar = ald, ses=ses, avNom0 =mb)
    
    
    dbRoll <- rbindlist(list(dbRoll, cmod), use.names=T, fill=T )
    
    print(summary(mod))
    print(summary(mod1))
    print(uniqueN(mdb[T94==0, LopNr]))
    print(uniqueN(mdb[T94==1, LopNr]))
    print(uniqueN(mdb[, LopNr]))
    print(dim(mdb))
    print(qsu(mdb$cID))
    print(dbRoll)
    j <- j+1
  } 
}
  # Save the results for candidacy to a new data set that will be used when plotting the results.
  write_fst(dbRoll, "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/NomRollReg9406.fst")

############ Now we perform similar steps to obtain the rolling regression results for voter turnout #################################
  
  # Specify the name of the treatment variable
  tvar <- "T94_90"
  
  # Specify the the birth cohorts to be used for the analysis, note that 
  # the rolling regression framework forces us to widen our standard range with +/- 3 years.
  lbyr <- 1939
  ubyr <- 1967
  
  # Specify if only include Swedish citizens 
  citizen <- 1
  
  # Specify the propensity score specification
  psformula <- formula(T94 ~
                         Kon + DAStod_1982 + DAStod_1983 + DAStod_1984 +
                         DAStod_1985 + DAStod_1986 + DAStod_1987 + DAStod_1988 +
                         DAStod_1989 +
                         DAntBarn_91 + Sambo_91 + InvBak |
                         UtbAr_91 + SSYK3_90 + SNI2_91 + Kommun_91 +
                         rCARB_1982 + rCARB_1983 + rCARB_1984 + rCARB_1985 +
                         rCARB_1986 + rCARB_1987 + rCARB_1988 + rCARB_1989 +
                         pArbInk_1990 + pArbInk_1991 +
                         FodAr + Rost82)
  
  ## Estimate two models, one for the complete sample including all observations,
  ## the second on the restricted sample only including individuals for which
  ## we can observe all elections bw 82-18 in which they are eligible to run for office. 
  
  Models <-  vector('list', 5)
  j <- 1
  
  dbRoll <- data.table()
  
  for(ald in seq(1967, 1942)) {
    for(ses in 1:1) { 
      # Read in the data sets
      NomVald <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_NomVald.fst", as.data.table=T)
      RTB <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_RTB.fst", as.data.table=T)
      db <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_Nom.fst", as.data.table=T)
      
      db[, T94 := get(tvar)]
      mdb <- db[between(FodAr, lbyr, ubyr) & (SvMedb82_18 >= citizen),]
      rm(db)
      gc()
      
      VD82  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_82.fst", as.data.table = T)
      VD82 <- VD82[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
      VD94r <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_1994rkl.fst", as.data.table = T)
      VD94r <- VD94r[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
      VD10  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_2010.fst", as.data.table = T)
      VD10 <- VD10[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
      VD18  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_2018.fst", as.data.table = T)
      VD18 <- VD18[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
      VD22  <- read.fst("D:/SCB_ConPol/Fst/VD/Valdelt_2022.fst", as.data.table = T)
      VD22 <- VD22[, cID := .N, by = LopNr][cID == 1 & !is.na(LopNr), ][, cID := NULL]
      
      VD82[r != 3 & !is.na(r), RRost := as.numeric(r %in% c(2, 4, 5, 6))]
      VD82[, Ar := 1982]
      VD94r[Rostratt == 1, RRost := as.numeric(r %in% c(2, 4, 5, 6))]
      VD94r[, Ar := 1994]
      VD10[Rostratt == 1, RRost := as.numeric(r %in% c(2, 4, 5, 6))]
      VD10[, Ar := 2010]
      VD18[Rostratt == 1, RRost := as.numeric(Rrost)]
      VD18[, Ar := 2018]
      VD22[Rostratt == 1, RRost := as.numeric(RD)]
      VD22[, Ar := 2022]
      VD <- rbindlist(list(VD82[, .(LopNr, RRost, Ar)], VD94r[, .(LopNr, RRost, Ar)],
                           VD10[, .(LopNr, RRost, Ar)], VD18[, .(LopNr, RRost, Ar)], VD22[, .(LopNr, RRost, Ar)]))
      
      VD[!is.na(RRost) & Ar>1982, AntVal := .N, by= LopNr]
      VD[, .N, by=AntVal]
      
      for(k in 1982:1989) {
        mdb[get(paste0("CARB_", k)) > 0, paste0("rCARB_", k) := cut_number(get(paste0("CARB_", k)), 100, labels = FALSE)]
        mdb[is.na(get(paste0("rCARB_", k))), paste0("rCARB_", k) := 0]
      }
      
      antVD <- unique(VD[!is.na(AntVal), .(LopNr, AntVal)])
      
      setnames(VD82, "RRost", "Rost82")
      mdb <- VD82[, .(LopNr, Rost82)][mdb, on = "LopNr"]
      mdb[FodAr>1964, Rost82 := -99]
      mdb[FodAr==1964 & FodMan>8, Rost82 := -99]
      mdb[is.na(Rost82), Rost82:=-99]
      
      mdb <- antVD[mdb, on = "LopNr"]
      mdb[is.na(AntVal), AntVal := 0]
      mdb <- mdb[AntVal==4,]
      
      rm(VD82, VD94r, VD10, VD18)
      gc()
      
      # Split the data in SES quartiles
      mdb[!is.na(avSES3), ravSES := cut_number(avSES3, 4, labels = FALSE)]
      
      if(ses==1) mdb <- mdb[between(ravSES, 1, 1)]
      if(ses==2) mdb <- mdb[between(ravSES, 2, 2)]
      if(ses==3) mdb <- mdb[between(ravSES, 3, 3)]
      if(ses==4) mdb <- mdb[between(ravSES, 4, 4)]
      
      albyr <- ald-3
      aubyr <- ald+3
      
      mdb <- mdb[between(FodAr, albyr, aubyr), ]
      
      print(qsu(mdb[, .(avSES, UtbAr_91, AntVal)]))
      
      # Use logit model to estimate propensity scores
      mdb <- mdb[complete.cases(mdb),]
      tm <- proc.time()
      logmod <- feglm(psformula,  
                      data = mdb, family="binomial")
      proc.time()-tm
      print(summary(logmod))
      
      # Extract predicted probability (propensity scores)
      if(logmod$nobs == logmod$nobs_origin){ 
        mdb[, pT94 := predict(logmod, type = "response")]
      }else{
        mdb[logmod$obs_selection$obsRemoved, pT94 := predict(logmod, type = "response")]
      }
      
      mdb <- mdb[complete.cases(mdb),]
      print(qsu(mdb[, .(FodAr, UtbAr_91, AntVal)]))
      
      # Now use the Full matching approach of Sävje et al. to obtain ATT and ATE weights.  
      
      tmp <- proc.time()
      set.seed(7892)
      m.out <- matchit(T94~1, data = mdb,
                       distance = mdb$pT94, method = "quick", estimand = "ATT")
      proc.time()-tmp
      m.data1 <- as.data.table(match.data(m.out))[, .(LopNr, weights)]
      setnames(m.data1, "weights", "gm_att")
      
      mdb <- m.data1[, .(LopNr, gm_att)][mdb, on = "LopNr"]
      gc()
      
      RTB <- mdb[RTB, on = "LopNr"]
      
      rm(mdb)
      gc()
      
      rm(list=setdiff(ls(), c("NomVald", "RTB", "rtbvar", "Models", "j", "tvar", "lbyr", "ubyr", "citizen", "psformula", "VD", "ald", "dbRoll", "ses",
                              "g3", "g4", "g0", "g1", "g5", "g6", "g0b", "g1b")))
      gc()
      
      mdb <- RTB[!is.na(gm_att), 
                 .(Kon, Nom82, Nom85, Nom88, Nom91, Sysselsatt_85, DAntBarn_91, Sambo_91, 
                   DStud_91, DForLed_91, DSjukRe_91, DSocBidrFam_91, DBostBidrFam_91,
                   UtbAr_91, FodAr, InvBak, isei, Yrke80_90, SNI2_90, Kommun_91, fp_LoneInk_85, fp_LoneInk_90, fp_LoneInk_91,
                   gm_att, Nom, LopNr, T94, Ar, pT94)]
      
      gc()
      
      mdb <- VD[mdb, on = c("LopNr", "Ar")]
      
      mdb[, Nom := Nom*100]
      mdb[, RRost := RRost*100]
      
      rm(RTB)
      gc()
      
      
      mdb[, Post := as.numeric(Ar>1991)]
      
      # Estimate the short-run effects on turnout, using our matching weights.
      mod <- feols(RRost~T94+i(Post, T94, ref=0) | Ar, cluster ="LopNr", 
                   weights = mdb[Ar %in% c(1982, 1994), gm_att], 
                   mdb[Ar %in% c(1982, 1994)])
      
      # Estimate the long-run effects on turnout, using our matching weights.
      mod1 <- feols(RRost~T94+i(Post, T94, ref=0) | Ar, cluster ="LopNr", 
                    weights = mdb[Ar %in% c(1982, 2010, 2018, 2022), gm_att], 
                    mdb[Ar %in% c(1982, 2010, 2018, 2022)])
      
      
      
      mb <- mean(mdb[Ar>1991 & T94==0, RRost], na.rm=T)
      print(mb)
      
      cmod <- data.table(b_sh = coef(mod)["Post::1:T94"], b_lo = coef(mod1)["Post::1:T94"], se_sh = se(mod)["Post::1:T94"], se_lo = se(mod1)["Post::1:T94"], 
                         ar = ald, ses=ses, avNom0 =mb)
      
      dbRoll <- rbindlist(list(dbRoll, cmod), use.names=T, fill=T )
      
      print(summary(mod))
      print(uniqueN(mdb[T94==0, LopNr]))
      print(uniqueN(mdb[T94==1, LopNr]))
      print(dbRoll)
      j <- j+1
    }  
  }
  
  write_fst(dbRoll, "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/VDRollReg_4267.fst")
  
## Now use the results obtained above to create Figure 5 in the main text, which shows rolling regression results for candidacy and turnout.  
  
  dbRoll <- read_fst("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/VDRollReg_4267.fst", as.data.table=T)
  
  dbRoll[, Alder := 1992-ar]
  setnames(dbRoll, c("b_sh", "b_lo", "se_sh", "se_lo"), c("b_short", "b_long", "se_short", "se_long"))
  
  dbRoll[, ciL := b_short-1.96*se_short]
  dbRoll[, ciH := b_short+1.96*se_short]
  dbRoll[, ciL2 := b_long-1.96*se_long]
  dbRoll[, ciH2 := b_long+1.96*se_long]
  
  g0 <- ggplot(dbRoll[ses==1]) +
    geom_ribbon(aes(y=b_short, x=Alder, ymax=ciH, ymin=ciL), alpha=.1) +
    geom_line(aes(Alder, b_short)) +
    geom_point(aes(Alder, b_short)) +
    theme_classic(base_size = 14) +
    xlab("Age in 1992") +
    ylab("Job Loss Effect (pp.)")+
    ggtitle("Turnout, Short-Run Effect (1994), SES Q1" ) +
    scale_y_continuous(
      limits = c(-6, 1),
      breaks = seq(-6, 1, by = 1),
      labels = function(x) sprintf("%.1f", x)
    ) +
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
  
  g0 
  
  g1 <- ggplot(dbRoll[ses==1]) +
    geom_ribbon(aes(y=b_long, x=Alder, ymax=ciH2, ymin=ciL2), alpha=.1) +
    geom_line(aes(Alder, b_long)) +
    geom_point(aes(Alder, b_long)) +
    theme_classic(base_size = 14) +
    xlab("Age in 1992") +
    ylab("Job Loss Effect (pp.)")+
    ggtitle("Turnout, Long-Run Effect (2010, 2018, 2022), SES Q1")+
    scale_y_continuous(
      limits = c(-6, 1),
      breaks = seq(-6, 1, by = 1),
      labels = function(x) sprintf("%.1f", x)
    ) +
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
  
  g1 
  
  dbRoll <- read_fst("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/NomRollReg9406.fst", as.data.table=T)
  
  setnames(dbRoll, c("b_sh", "b_lo", "se_sh", "se_lo"), c("b_short", "b_long", "se_short", "se_long"))
  
  dbRoll[, ciL := b_short-1.96*se_short]
  dbRoll[, ciH := b_short+1.96*se_short]
  dbRoll[, ciL2 := b_long-1.96*se_long]
  dbRoll[, ciH2 := b_long+1.96*se_long]
  
  dbRoll[, Alder := 1992-ar]
  g3 <- ggplot(dbRoll[ses==4]) +
    geom_ribbon(aes(y=b_short, x=Alder, ymax=ciH, ymin=ciL), alpha=.1) +
    geom_line(aes(Alder, b_short)) +
    geom_point(aes(Alder, b_short)) +
    theme_classic(base_size = 14) +
    xlab("Age in 1992") +
    ylab("Job Loss Effect (pp.)")+
    ggtitle("Candidacy, Short-Run Effect (1994-2006), SES Q4" ) +
    scale_y_continuous(limits = c(-.2, 1.65), breaks = round(seq(-.2, 1.6, by = .2), digits=1)) +
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
  
  g3 
  
  g4 <- ggplot(dbRoll[ses==4]) +
    geom_ribbon(aes(y=b_long, x=Alder, ymax=ciH2, ymin=ciL2), alpha=.1) +
    geom_line(aes(Alder, b_long)) +
    geom_point(aes(Alder, b_long)) +
    theme_classic(base_size = 14) +
    xlab("Age in 1992") +
    ylab("Job Loss Effect (pp.)")+
    ggtitle("Candidacy, Long-Run Effect (2010-2022), SES Q4" ) +
    scale_y_continuous(limits = c(-.2, 1.65), breaks = round(seq(-.2, 1.6, by = .2), digits=1)) +
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
  
  g4 

  
  # Combine the four separate plots in one graph, this is Figure 5.
  ggarrange(g3, g4, g0, g1)
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/Figure5.pdf", width = 15, height =10, units = "in")
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/Figure5.eps", width = 15, height =10, units = "in",  device=cairo_ps)
  
sink()